GPS loggers are not the same for successful and failed breeders so they have to be processed differently before being merged. Moreover, data from 2015 and 2018 have different formats and different constraints so they are also processed separately. This file aims at formatting the data and compare results for 2018 when cleaning trips based on different threshold parameters.
Loggers deployed on successful breeders are IgotU GPS. They give time in UTC GMT+2 so 3h have to be added to get UTC local time (UTC=GMT+5). They record locations every 5 minutes. They have a duty cycle of 17h on/7h off, providing data from 5am to 22pm UTC. Note that one successful breeder (BAR07,first file listed in directory) was tracked with a GPS-UHF logger and is processed with failed breeders. Two successful breeders (CB9, CC1) failed during tracking, losing their young chick so all data after failure were removed.
library(fields)
library(maptools)
library(maps)
library(stringr)
library(xlsx)
library(plotrix)
library(pastecs)
library(simr)
library(kableExtra)
library(plyr)
library(lme4)
library(afex)
source("R scripts/functions.R")
#Coordinates of the colony and date limit for the tracking study
colony<-data.frame(Longitude =77.52356,Latitude=-37.8545)
date.max<-as.POSIXct("2018-12-26 12:00:00", format="%F %H:%M:%S",tz="UTC")
#import of successful breeders
files.suc<-list.files("Data/RawData/Breeders_2018")
path.suc<-"Data/RawData/Breeders_2018/"
success.2018<-NULL
#Limit data before failure for individuals that have lost chick during tracking
fail.lim<-c(as.POSIXct("2018-12-14 10:00:00", format="%F %H:%M:%S",tz="UTC"),
as.POSIXct("2018-12-06 12:00:00", format="%F %H:%M:%S",tz="UTC"))
#first file for successful breeder is with GPS-UHF logger so treated in failed breeders loop
for ( a in 2:length(files.suc)){
temp.suc.2018<-read.csv2(paste(path.suc,files.suc[a],sep=""),as.is=T,
header=T,sep=",",fill=T)
temp.suc.2018$DateTime<-paste(temp.suc.2018$Date,temp.suc.2018$Time,sep= " ")
temp.suc.2018$DateTime <-as.POSIXct(strptime(temp.suc.2018$DateTime,format="%Y/%m/%d %H:%M:%S"),
format="%F %H:%M:%S",tz="UTC")
temp.suc.2018$DateTime <-temp.suc.2018$DateTime + 3*3600 #get local time
#extract ID from file name
temp.suc.2018$ID<- str_sub(files.suc[a],start=11,end=13)
temp.suc.2018$ID<-as.factor(temp.suc.2018$ID)
#for individuals which have failed during tracking, remove dates after failure
if(temp.suc.2018$ID[1]=="CB9"){
temp.suc.2018<-temp.suc.2018[temp.suc.2018$DateTime<fail.lim[1],]}
if(temp.suc.2018$ID[1]=="CC1"){
temp.suc.2018<-temp.suc.2018[temp.suc.2018$DateTime<fail.lim[2],]}
temp.suc.2018<-temp.suc.2018[!duplicated(temp.suc.2018$DateTime)==T,]
temp.suc.2018$Status<-as.factor("success")
temp.suc.2018$Longitude<-as.numeric(temp.suc.2018$Longitude)
temp.suc.2018$Latitude<-as.numeric(temp.suc.2018$Latitude)
#distance between location and colony
temp.suc.2018$Distmax<-NA
temp.suc.2018$Distmax<-as.vector(rdist.earth(temp.suc.2018[,c("Longitude","Latitude")],
colony[1,c("Longitude","Latitude")],miles=F))
#calculate distance between 2 consecutive points and time elapsed between 2 consecutive points
temp.suc.2018$Distadj<-0
for ( n in 2:nrow(temp.suc.2018)){
temp.suc.2018$Distadj[n]<-rdist.earth(temp.suc.2018[n,c("Longitude","Latitude")],
temp.suc.2018[n-1,c("Longitude","Latitude")],miles=F)
temp.suc.2018$Difftime<-c(0,difftime(temp.suc.2018$DateTime[2:nrow(temp.suc.2018)],
temp.suc.2018$DateTime[1:nrow(temp.suc.2018)-1],units="min"))
}
temp.suc.2018<-temp.suc.2018[,c("ID","Status","DateTime","Longitude","Latitude",
"Distmax","Difftime","Distadj")]
success.2018<-rbind(success.2018,temp.suc.2018)
}
Loggers deployed in failed breeders are GPS-UHF Ecotone GPS. They give time in GMT so 5h have to be added to get local time (UTC=GMT+5). Note that two loggers on failed breeders had the same ID HAR01 so trips of 2 individuals have been recorded in the same file with the same ID. They have been separated manually based on locations. One successful breeder (BAR07,first file listed in Non-breeders directory) was tracked with a GPS-UHF logger and is processed with failed breeders.
##Import of failed birds (GPS-UHF)
files.fail<-list.files("Data/RawData/Non_Breeders_2018")
path.fail<-"Data/RawData/Non_Breeders_2018/"
fail.2018<-NULL
for ( a in 1:length(files.fail)){
temp.fail.2018<-read.csv2(paste(path.fail,files.fail[a],sep=""),as.is=T,
header=T,sep=";",fill=T)
temp.fail.2018$DateTime<-paste(temp.fail.2018$Year,"-", temp.fail.2018$Month,"-",
temp.fail.2018$Day, " ", temp.fail.2018$Hour, ":",
temp.fail.2018$Minute,":", temp.fail.2018$Second,sep="")
temp.fail.2018$DateTime <-as.POSIXct(strptime(temp.fail.2018$DateTime,format="%F %H:%M:%S"),
format="%F %H:%M:%S",tz="UTC")
temp.fail.2018$DateTime <- temp.fail.2018$DateTime + 5*3600
temp.fail.2018<-temp.fail.2018[!duplicated(temp.fail.2018$DateTime)==T,]
temp.fail.2018$Longitude<-as.numeric(temp.fail.2018$Longitude)
temp.fail.2018$Latitude<-as.numeric(temp.fail.2018$Latitude)
#Assign colony coordinates when in range
temp.fail.2018$Longitude[which(is.na(temp.fail.2018$Longitude)==T &
temp.fail.2018$In.range==1)]<-colony$Longitude
temp.fail.2018$Latitude[which(is.na(temp.fail.2018$Latitude)==T &
temp.fail.2018$In.range==1)]<-colony$Latitude
temp.fail.2018<-temp.fail.2018[!is.na(temp.fail.2018$Latitude)==T,]
#Discriminate the 2 individuals with same tag ID
if(files.fail[a]=="HAR01_YNA.csv"){
idd<-read.xlsx("Data/RawData/HAR01_double_2018.xlsx",
sheetIndex=1, header=TRUE,colIndex=c(4,11))
temp.fail.2018$id<-idd$id
#when "in.range" is true, locations are set to colony coordinates
temp.fail.2018$Logger.ID<-ifelse(temp.fail.2018$id=="1",temp.fail.2018$Logger.ID<-"HAR11",
temp.fail.2018$Logger.ID<-"HAR12")
temp.fail.2018<-temp.fail.2018[order(temp.fail.2018[,2],temp.fail.2018[,1]),-29]
}
#Discriminate the successful breeder
temp.fail.2018$Status<-ifelse(temp.fail.2018$Logger.ID=="BAR07", temp.fail.2018$Status<-"success",
temp.fail.2018$Status<- "fail")
temp.fail.2018$Status<-as.factor(temp.fail.2018$Status)
#distance between location and colony
temp.fail.2018$Distmax<-NA
temp.fail.2018$Distmax<-as.vector(rdist.earth(temp.fail.2018[,c("Longitude","Latitude")],
colony[1,c("Longitude","Latitude")],miles=F))
temp.fail.2018$Distmax[temp.fail.2018$In.range==1]<-0
names(temp.fail.2018)[1]<-"ID"
temp.fail.2018$ID<-as.factor(temp.fail.2018$ID)
#calculate distance between 2 consecutive points and time elapsed between 2 consecutive points
temp.fail.2018$Distadj<-0
for ( n in 2:nrow(temp.fail.2018)){
temp.fail.2018$Distadj[n]<-rdist.earth(temp.fail.2018[n,c("Longitude","Latitude")],
temp.fail.2018[n-1,c("Longitude","Latitude")],miles=F)
temp.fail.2018$Difftime<-c(0,difftime(temp.fail.2018$DateTime[2:nrow(temp.fail.2018)],
temp.fail.2018$DateTime[1:nrow(temp.fail.2018)-1],units="min"))
}
temp.fail.2018<-temp.fail.2018[,c("ID","Status","DateTime","Longitude","Latitude","Distmax","Difftime","Distadj")]
fail.2018<-rbind(fail.2018,temp.fail.2018)
}
fail.2018$Difftime[which(fail.2018$Difftime<0)]<-0
fail.2018<-subset(fail.2018,fail.2018$DateTime < date.max)
Parameters to define a trip are based on distance to the colony, number of consecutive locations out of the colony and total duration of the trip. Apart from 7h gaps due to duty cycles, no time gaps >10min are recorded in successful breeder tracks (the ones observed in previous plot occur at the colony). On the other side, data for failed breeders have more large time gaps.Therefore, a time gap threshold of 8h is applied on successful breeders. To better determine the effect of time gaps on the selection of trips in failed breeders, different time gap thresholds are tested and compared with the one of failed breeders to homogenize thresholds.
dataset.2018<-rbind(success.2018,fail.2018)
dataset.2018<-dataset.2018[duplicated(dataset.2018$DateTime)==FALSE,] # remove duplicated points with the same date and time
datasetTravelNb<-"NA"
dataset.2018$PathLength<-0
##Parameters to define a trip in 2018
dist.thres<-1 #distance threshold (in km)
last.dist<-10 #distance threshold (in km) when having only one location between 2 trips
row.thres<- 12 #number of rows constituting a trip
dur.thres<-120 #min duration of a trip
#Define trip number within each individual
ALL.2018<-define_trips(dataset.2018) #function from source file
ALL.2018$TravelID<-paste(ALL.2018$ID,ALL.2018$TravelNb,sep=".")
save(ALL.2018,file="Data/NewlyCreatedData/raw_trips_2018.RData")
#summarize raw trips by individuals or status. There is no time gap thresholds here, just total duration and distance
dist.max.all.trips.2018.status<-raw_trips_summary_status(ALL.2018) #function from source file
#clean trips and give summary by individuals or status. Several time gap thresholds are tested
fail<-subset(ALL.2018,ALL.2018$Status=="fail")
#10h gaps
diff.thres<-10*60
trips10h<-clean_trips_summary_status(ALL.2018,diff.thres) #function from source file
#8h gaps
diff.thres<-8*60
trips8h<-clean_trips_summary_status(ALL.2018,diff.thres) #function from source file
#6h gaps
diff.thres<-6*60
trips6h<-clean_trips_summary_status(fail,diff.thres) #function from source file
#5h gaps
diff.thres<-5*60
trips5h<-clean_trips_summary_status(fail,diff.thres) #function from source file
#4h gaps
diff.thres<-4*60
trips4h<-clean_trips_summary_status(fail,diff.thres) #function from source file
#3h gaps
diff.thres<-3*60
trips3h<-clean_trips_summary_status(fail,diff.thres) #function from source file
The first table shows the summary of trip characteristics with raw data (without any time gap threshold). The others show the summary of trip characteristics with different time gap thresholds. Note that successful breeders are not included in tests <8h because trips have to be retained so that the rest of the trip outside off cycle can be properly interpolated.
| Status | NbInd | NbTravel | MeanDist | SEDist | MinDist | MaxDist | MeanDur | SEDur | MinTripDur | MaxTripDur | MeanTotPath | SETotPath | MinTotPath | MaxTotPath |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| success | 9 | 19 | 375.7320 | 23.63303 | 232.97788 | 630.7741 | 58.20822 | 3.452602 | 30.847500 | 88.54556 | 1057.648 | 93.42379 | 578.37833 | 2185.241 |
| fail | 14 | 42 | 392.7061 | 52.04421 | 23.53467 | 1406.2726 | 90.70038 | 14.302777 | 5.453333 | 413.06778 | 1063.065 | 171.15263 | 59.27427 | 4775.886 |
| Status | NbInd | NbTravel | MeanDist | SEDist | MinDist | MaxDist | MeanDur | SEDur | MinTripDur | MaxTripDur | MeanTotPath | SEtotPath | MinTotPath | MaxTotPath |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| success | 9 | 18 | 376.4886 | 24.97175 | 232.97788 | 630.7741 | 58.73886 | 3.606683 | 30.847500 | 88.54556 | 1062.657 | 98.62446 | 578.37833 | 2185.241 |
| fail | 13 | 39 | 381.0538 | 53.02511 | 32.59453 | 1406.2726 | 88.27746 | 14.877474 | 8.966667 | 413.06778 | 1082.234 | 182.07889 | 64.19188 | 4775.886 |
| Status | NbInd | NbTravel | MeanDist | SEDist | MinDist | MaxDist | MeanDur | SEDur | MinTripDur | MaxTripDur | MeanTotPath | SEtotPath | MinTotPath | MaxTotPath |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| success | 9 | 18 | 376.4886 | 24.97175 | 232.97788 | 630.7741 | 58.73886 | 3.606683 | 30.847500 | 88.54556 | 1062.657 | 98.62446 | 578.37833 | 2185.241 |
| fail | 13 | 38 | 372.4466 | 53.71737 | 32.59453 | 1406.2726 | 86.51770 | 15.167048 | 8.966667 | 413.06778 | 1081.725 | 186.93445 | 64.19188 | 4775.886 |
| Status | NbInd | NbTravel | MeanDist | SEDist | MinDist | MaxDist | MeanDur | SEDur | MinTripDur | MaxTripDur | MeanTotPath | SEtotPath | MinTotPath | MaxTotPath |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| fail | 12 | 35 | 370.1589 | 58.04865 | 32.59453 | 1406.273 | 81.14568 | 16.08053 | 8.966667 | 413.0678 | 1072.182 | 199.8138 | 64.19188 | 4775.886 |
| Status | NbInd | NbTravel | MeanDist | SEDist | MinDist | MaxDist | MeanDur | SEDur | MinTripDur | MaxTripDur | MeanTotPath | SEtotPath | MinTotPath | MaxTotPath |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| fail | 12 | 32 | 381.2155 | 62.81695 | 32.59453 | 1406.273 | 85.26451 | 17.42591 | 8.966667 | 413.0678 | 1141.311 | 214.3825 | 64.19188 | 4775.886 |
| Status | NbInd | NbTravel | MeanDist | SEDist | MinDist | MaxDist | MeanDur | SEDur | MinTripDur | MaxTripDur | MeanTotPath | SEtotPath | MinTotPath | MaxTotPath |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| fail | 11 | 31 | 386.0655 | 64.68341 | 32.59453 | 1406.273 | 87.0755 | 17.89995 | 8.966667 | 413.0678 | 1169.7 | 219.4634 | 64.19188 | 4775.886 |
| Status | NbInd | NbTravel | MeanDist | SEDist | MinDist | MaxDist | MeanDur | SEDur | MinTripDur | MaxTripDur | MeanTotPath | SEtotPath | MinTotPath | MaxTotPath |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| fail | 10 | 23 | 308.7396 | 65.74854 | 32.59453 | 968.9207 | 67.6774 | 19.98415 | 8.966667 | 413.0678 | 998.66 | 249.7139 | 64.19188 | 4775.886 |
In the plots, all black dots represent trips/data that have been excluded. All colored trips are the selected ones.
#clean trips with the chosen time gap threshold and give locations of selected trips
diff.thres<- 8*60
clean.trips.suc.2018<-clean_trips_locations(subset(ALL.2018,ALL.2018$Status=="success"),diff.thres)
diff.thres<- 5*60
clean.trips.fail.2018<-clean_trips_locations(fail,diff.thres)
clean.trips.loc.2018<-rbind(clean.trips.suc.2018,clean.trips.fail.2018)
#Number of gaps > 16min
length(which(clean.trips.fail.2018$Difftime>16 ))/nrow(clean.trips.fail.2018)*100
## [1] 11.96503
#Number of gaps > 20min
length(which(clean.trips.loc.2018$Difftime>20))/nrow(clean.trips.loc.2018)*100
## [1] 5.791855
#Number of gaps > 30min
length(which(clean.trips.loc.2018$Difftime>30))/nrow(clean.trips.loc.2018)*100
## [1] 5.369532
reso <- 15 * 60 #time resolution between 2 points in seconds
new.trip<-NULL
loc.interpolated.2018<-NULL
id<-unique(clean.trips.loc.2018$ID)
#interpolate data for each individual separately
for (x in 1:length(id)){
sub<-subset(clean.trips.loc.2018,clean.trips.loc.2018$ID==id[x])
trip<-unique(sub$TravelNb)
#interpolate data for each trip separately
for (y in 1:length(trip)){
sub.trip<-subset(sub,sub$TravelNb==trip[y])
sub.trip$TimeSinceOrigin<-rep(0,nrow(sub.trip))
for (z in 1:nrow(sub.trip)){
sub.trip$TimeSinceOrigin[z]<-difftime(sub.trip$DateTime[z],sub.trip$DateTime[1],units="sec")
}
# Resampling of long and lat with regular time intervals
sub.lat1 <- regul(x=sub.trip$TimeSinceOrigin, y=sub.trip$Latitude,
n=round((max(sub.trip$TimeSinceOrigin)/reso),0),
deltat=reso, methods="linear",xmin=0,units="sec")
sub.lon1 <- regul(x=sub.trip$TimeSinceOrigin, y=sub.trip$Longitude,
n=round((max(sub.trip$TimeSinceOrigin)/reso),0),
deltat=reso, methods="linear",xmin=0,units="sec")
new.sub <- data.frame(Longitude=sub.lon1[[2]]$Series, Latitude=sub.lat1[[2]]$Series, Time=as.vector(sub.lon1[[1]]),
DateTime=sub.trip$DateTime[1]+as.vector(sub.lon1[[1]]),TravelNb=trip[y])
#Recreate a dataframe with individual and trip characteristics
new.sub$ID<-id[x]
new.sub$Status<-rep(unique(sub$Status),nrow(new.sub))
new.sub$Distmax<-as.vector(rdist.earth(new.sub[,c("Longitude","Latitude")],
colony[1,c("Longitude","Latitude")],miles=F))
new.sub$Difftime<-c(0,difftime(new.sub$DateTime[2:nrow(new.sub)],new.sub$DateTime[1:nrow(new.sub)-1],units="mins"))
new.sub$Distadj<-0
new.sub$TravelID<-paste(new.sub$ID,new.sub$TravelNb,sep=".")
for (z in 2:nrow(new.sub)){
new.sub$Distadj[z]<-rdist.earth(new.sub[z,c("Longitude","Latitude")],
new.sub[z-1,c("Longitude","Latitude")], miles=F)
}
new.trip<-rbind(new.trip,new.sub)
}
}
#create a new global dataset with interpolated data
loc.interpolated.2018<-new.trip[,c("ID","Status","TravelNb","TravelID","DateTime","Longitude","Latitude",
"Distmax","Difftime","Distadj")]
save(loc.interpolated.2018,file="Data/NewlyCreatedData/clean_interp_loc_2018.RData")
raw.trips.2018<-raw_trips_summary_ind(ALL.2018)
Linear mixed models are applied to test whether the trip variables are different between failed and successful breeders on the raw trips
maxdist<-lmer(sqrt(Distmax) ~ Status + (1|ID),data=raw.trips.2018)
dur<-lmer(sqrt(TripDur) ~ Status + (1|ID),data=raw.trips.2018)
totdist<-lmer(sqrt(TotalPath) ~ Status + (1|ID),data=raw.trips.2018)
anova(maxdist)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Status 30.222 30.222 1 59 0.5155 0.4756
anova(dur)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Status 7.5893 7.5893 1 59 0.4899 0.4867
anova(totdist)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Status 186.53 186.53 1 59 0.9328 0.3381
#Power analysis based on Simr package